import numpy as np
import pandas as pd
import networkx as nx
from networkx.algorithms.community import greedy_modularity_communities
import nxviz as nv
import folium
from itertools import combinations
from random import shuffle
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
/opt/anaconda3/lib/python3.8/site-packages/nxviz/__init__.py:18: UserWarning: nxviz has a new API! Version 0.7.4 onwards, the old class-based API is being deprecated in favour of a new API focused on advancing a grammar of network graphics. If your plotting code depends on the old API, please consider pinning nxviz at version 0.7.4, as the new API will break your old code. To check out the new API, please head over to the docs at https://ericmjl.github.io/nxviz/ to learn more. We hope you enjoy using it! (This deprecation message will go away in version 1.0.) warnings.warn(
df = pd.read_csv('HELA.csv') # read csv file with pandas to make dataframe
df= df[['from_name', 'to_name']] # dataframe from two columns of interest which is edge list
df= df.dropna()
df
| from_name | to_name | |
|---|---|---|
| 3 | chr1:37919801-37921471 | chr1:38147315-38149061 |
| 4 | chr1:38085887-38088644 | chr1:38147315-38149061 |
| 5 | chr1:37468092-37469352 | chr1:38147315-38149061 |
| 6 | chr1:38068054-38069586 | chr1:38147315-38149061 |
| 7 | chr1:38026811-38028353 | chr1:38147315-38149061 |
| ... | ... | ... |
| 22712 | chr11:111647344-111648933 | chr11:111703636-111705109 |
| 22714 | chr6:7672758-7674487 | chr6:7721283-7724223 |
| 22715 | chr10:30959364-30960941 | chr10:30975965-30977665 |
| 22716 | chr4:144296011-144297714 | chr4:144319909-144321454 |
| 22717 | chr3:148672537-148674276 | chr3:148837361-148839459 |
11381 rows × 2 columns
def get_nw(df):
nw = nx.from_pandas_edgelist(df, 'from_name', 'to_name') # create networkx graph from dataframe of edge list
return nw
hela = get_nw(df)
hela_betweenness = nx.betweenness_centrality(hela)
# calculate betweenness of nodes in the network,return in frozenset
# write the betweeness list in csv file for future work
# with open('hela_betweenness.csv', 'w') as outfile:
# for key, value in hela_betweenness.items():
# outfile.write(key + ',')
# outfile.write(str(value) + ',\n')
hela_degrees = hela.degree # calculate degree of nodes in the network, return in frozenset
hela_degrees = dict(hela_degrees)
# write the degrees list in csv file for future work
# with open('hela_degrees.csv', 'w') as outfile:
# for key, value in hela_degrees.items():
# outfile.write(key + ',')
# outfile.write(str(value) + ',\n')
pdb = dict()
for key in hela_betweenness.keys():
pdb[key] = hela_betweenness[key] * hela_degrees[key]
# product of degree and betweenness centrality
# Function to get subgraphs from network
def get_subgraphs(nw):
nw_subs = list((nw.subgraph(c) for c in nx.connected_components(nw)))
# list of connected subgraphs in network
nw_subs = sorted(nw_subs, key=len, reverse=True)
# sort the list of subgrab in descending order.
return nw_subs
hela_subs = get_subgraphs(hela)
def get_combinations(nw_subs):
combs = []
combs_dict = dict()
for graph in nw_subs:
# loop though each subgrab in the list of subgraph return from get_subgraphs()function
if len(graph) >= 4:
# to have a connected sugraph of 4 nodes, we need at least 4 edges
# so set validation to executive only for subgraph have greater or equal 4 edges
for n1, n2, n3, n4 in combinations(graph.nodes(), 4):
# combinations() generate all possible combinations of 4 nodes
# loop through ech combination f 4 nodes
nlist = [n1, n2, n3, n4] # list of each combination of 4 node
temp_sub = graph.subgraph(nlist) #create subgraph from each combination of 4 node
if nx.is_connected(temp_sub):
# set validation if the created subgraphs is connected subgraph
combs.append(nlist)
# append 4 node which pass the validation of being able to create connected subgraph
combs_dict[tuple(nlist)] = len(temp_sub.edges())
# convert list of nodes to tuple of nodes in order to remove duplicated node
# to have a list of unique nodes, and assign as a key in the dictionary combs_dict (key must be unique)
# create a dictionary with key assigned as nodes'name, and value is number of edges of subgraph created from the nodes
# print(temp_sub)
# nx.draw(temp_sub, with_labels = True)
# plt.show()
print(len(combs)) # check how many combinations of 4 nodes are in the subgraphs
return combs_dict
Reference:
Dey, A. K., Gel, Y. R., & Poor, H. V. (2019). What network motifs tell us about resilience and reliability of complex networks. Proceedings of the National Academy of Sciences of the United States of America, 116(39), 19368–19373. https://doi.org/10.1073/pnas.1819529116
The function simulate the idea of finding four-node motifs in the article above. As defined in the article, there will be 6 possible 4-node motifs in an undirected, unweighted graph (Dey et al., 2019).
def get_motifs(combs_dict,g):
m1s, m2s, m3s, m4s, m5s, m6s = 0, 0, 0, 0, 0, 0
for nodes, edge in combs_dict.items():
temp_sub = g.subgraph(nodes)
degs = [x[1] for x in temp_sub.degree()]
# create list of value of degrees from each nodes
# find number of 4-node motifs with 3, 4, 5, and 6 edges
if edge == 3:
# Validation of number of edges = 3 for motif 1-2
# as max degree of all nodes in subgraph with 3 edges is greater or less than 3)
if 3 in degs: # if the degree =3
m1s += 1 # append the subgraph to motif 1
else: # if the degree < 3
m2s += 1 # append the subgraph to motif 2
elif edge == 4:
# Validation of number of edges = 4 for motif 3-4
if 3 in degs: # if the degree = 3
m3s += 1 # append the subgraph to motif 3
else: # if the degree != 3 (the degree is could only be 2 in this case)
m4s += 1 # append the subgraph to motif 4
elif edge == 5:
#Validation of number of edges = 5 for motif 5
m5s += 1 # the subgraph to motif 5
elif edge == 6:
#Validation of number of edges = 6 for motif 6
m6s += 1 # the subgraph to motif 6
return (m1s, m2s, m3s, m4s, m5s, m6s)
# The function to keep track after each removal of certain node
#how the network maintain its number of motifs
def get_status(nw, nw_subs, attack_node):
percentage = len(nw)//20 # 5 percent of total number of the node in a network
combs_dict = [] #define empty list for list of
combs_lengths = []
m1s, m2s, m3s, m4s, m5s, m6s = [], [], [], [], [], [] # empty list to hold number of each motif type
for i in range(10):
combs_dict.append(get_combinations(nw_subs))
combs_lengths.append(len(combs_dict[i]))
m1, m2, m3, m4, m5, m6 = get_motifs(combs_dict[i],nw)
#assign number of each motif type
m1s.append(m1)
m2s.append(m2)
m3s.append(m3)
m4s.append(m4)
m5s.append(m5)
m6s.append(m6)
# append each number in to the list of each motif type
for node in attack_node[:percentage]: # get the first 5% of the list of nodes in targeted nodes list
nw.remove_node(node) # remove these node
attack_node = attack_node[percentage:] # reset the list of targeted list to have the rest of the nodes remain
# keep run through the next 5% until the end of the targeted nodes list
return (combs_lengths, combs_dict, m1s, m2s, m3s, m4s, m5s, m6s)
Random Attack
#create the random the list of randomly shuffled nodes
shuffled_nodes = list(hela.nodes())
shuffle(shuffled_nodes)
# get all the data needed for random attack
# call the get_status() function using shuffled nodes as list of node in target to remove
# assign these data from output into vectors, which will be used for visualization later on
rndm_lengths, rndm_dict, rndm_m1s, rndm_m2s, rndm_m3s, rndm_m4s, \
rndm_m5s, rndm_m6s = get_status(hela, hela_subs, shuffled_nodes)
18093 14648 11865 9453 7542 5947 4655 3419 2460 1720
#checking if everything look right
print(rndm_m1s, rndm_m2s, rndm_m3s, rndm_m4s, rndm_m5s, rndm_m6s)
print(rndm_lengths)
[4270, 3421, 2744, 2178, 1761, 1391, 1088, 824, 571, 405] [8999, 7250, 5878, 4714, 3786, 2959, 2283, 1711, 1239, 860] [3493, 2882, 2352, 1843, 1432, 1144, 923, 643, 473, 330] [483, 392, 319, 254, 200, 157, 110, 79, 54, 34] [755, 626, 504, 408, 324, 261, 220, 140, 108, 80] [93, 77, 68, 56, 39, 35, 31, 22, 15, 11] [18093, 14648, 11865, 9453, 7542, 5947, 4655, 3419, 2460, 1720]
Betweeness-based Attack
# Reset network
hela = get_nw(df)
hela_subs = get_subgraphs(hela)
#get betweeness of all nodes in the network, then sort them
sorted_hela_betweenness = sorted(hela_betweenness.items(), key = lambda x: x[1], reverse = True)
# convert frozen set to list
sorted_betw_nodes = [node[0] for node in sorted_hela_betweenness]
# get all the data needed for betweeness attack,
# call the function using list of node sorted in the decending order of betweeness as list of node in target to remove
# assign these data into vectors, which will be used for visualization later on
betw_lengths, betw_dict, betw_m1s, betw_m2s, betw_m3s,\
betw_m4s, betw_m5s, betw_m6s = get_status(hela, hela_subs,sorted_betw_nodes)
18093 8386 4738 2475 1176 595 248 50 8 3
print(betw_m1s, betw_m2s, betw_m3s, betw_m4s, betw_m5s, betw_m6s)
print(betw_lengths)
[4270, 1755, 971, 423, 162, 57, 18, 0, 0, 0] [8999, 4078, 2176, 1103, 499, 247, 73, 8, 0, 0] [3493, 1769, 1077, 618, 317, 162, 57, 1, 0, 0] [483, 300, 209, 134, 80, 46, 37, 15, 0, 0] [755, 418, 265, 166, 97, 64, 47, 20, 3, 0] [93, 66, 40, 31, 21, 19, 16, 6, 5, 3] [18093, 8386, 4738, 2475, 1176, 595, 248, 50, 8, 3]
Degree-based Attack
hela = get_nw(df)
hela_subs = get_subgraphs(hela)
hela_degrees_std = sorted(hela_degrees.items(), key = lambda x: x[1], reverse = True)
sorted_deg_nodes = [node[0] for node in hela_degrees_std]
deg_lengths, deg_dict, deg_m1s, deg_m2s, deg_m3s, deg_m4s, \
deg_m5s, deg_m6s = get_status(hela, hela_subs, sorted_deg_nodes)
18093 5911 2817 1823 688 341 282 199 121 66
print(deg_m1s, deg_m2s, deg_m3s, deg_m4s, deg_m5s, deg_m6s)
print(deg_lengths)
[4270, 1106, 435, 275, 73, 0, 0, 0, 0, 0] [8999, 3592, 1951, 1275, 552, 329, 273, 195, 119, 65] [3493, 866, 294, 182, 37, 0, 0, 0, 0, 0] [483, 189, 102, 69, 23, 12, 9, 4, 2, 1] [755, 140, 32, 20, 3, 0, 0, 0, 0, 0] [93, 18, 3, 2, 0, 0, 0, 0, 0, 0] [18093, 5911, 2817, 1823, 688, 341, 282, 199, 121, 66]
Product of betweeness and degree attack
hela = get_nw(df)
hela_subs = get_subgraphs(hela)
pdb_std = sorted(pdb.items(), key = lambda x: x[1], reverse = True)
sorted_pdb_nodes = [node[0] for node in pdb_std]
pdb_lengths, pdb_dict, pdb_m1s, pdb_m2s, pdb_m3s, pdb_m4s, \
pdb_m5s, pdb_m6s = get_status(hela, hela_subs, sorted_pdb_nodes)
18093 6887 3454 1761 885 452 147 47 3 3
print(pdb_m1s, pdb_m2s, pdb_m3s, pdb_m4s, pdb_m5s, pdb_m6s)
print(rndm_lengths)
print(betw_lengths)
print(deg_lengths)
print(pdb_lengths)
[4270, 1402, 609, 262, 133, 15, 1, 0, 0, 0] [8999, 3510, 1739, 874, 413, 230, 93, 7, 0, 0] [3493, 1375, 748, 393, 191, 113, 5, 0, 0, 0] [483, 253, 160, 98, 70, 38, 20, 15, 0, 0] [755, 300, 169, 113, 62, 42, 22, 20, 0, 0] [93, 47, 29, 21, 16, 14, 6, 5, 3, 3] [18093, 14648, 11865, 9453, 7542, 5947, 4655, 3419, 2460, 1720] [18093, 8386, 4738, 2475, 1176, 595, 248, 50, 8, 3] [18093, 5911, 2817, 1823, 688, 341, 282, 199, 121, 66] [18093, 6887, 3454, 1761, 885, 452, 147, 47, 3, 3]
percents = [i for i in range(100,50, -5)]
fig = go.Figure()
fig.add_trace(go.Scatter(x=percents, y=rndm_lengths, name = 'Random Attack'))
fig.add_trace(go.Scatter(x=percents, y=betw_lengths, name = 'Betweenness Attack'))
fig.add_trace(go.Scatter(x=percents, y=deg_lengths, name = 'Degree Attack'))
fig.add_trace(go.Scatter(x=percents, y=pdb_lengths, name = 'Degree * Betweenness Attack'))
fig.update_layout(title='Motifs Remaining at Percentage Nodes Remaining',
template="plotly_white",
xaxis_title='Percent Nodes Left',
yaxis_title='Motifs Remaining')
fig['layout']['xaxis']['autorange'] = "reversed"
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x=percents, y=rndm_m1s, name = 'Random Attack'))
fig.add_trace(go.Scatter(x=percents, y=betw_m1s, name = 'Betweenness Attack'))
fig.add_trace(go.Scatter(x=percents, y=deg_m1s, name = 'Degree Attack'))
fig.add_trace(go.Scatter(x=percents, y=pdb_m1s, name = 'Degree * Betweenness Attack'))
fig.update_layout(title='Motifs Remaining at Percentage Nodes Remaining(M1)',
template="plotly_white",
xaxis_title='Percent Nodes Left',
yaxis_title='Motifs Remaining')
fig['layout']['xaxis']['autorange'] = "reversed"
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x=percents, y=rndm_m2s, name = 'Random Attack'))
fig.add_trace(go.Scatter(x=percents, y=betw_m2s, name = 'Betweenness Attack'))
fig.add_trace(go.Scatter(x=percents, y=deg_m2s, name = 'Degree Attack'))
fig.add_trace(go.Scatter(x=percents, y=pdb_m2s, name = 'Degree * Betweenness Attack'))
fig.update_layout(title='Motifs Remaining at Percentage Nodes Remaining(M2)',
template="plotly_white",
xaxis_title='Percent Nodes Left',
yaxis_title='Motifs Remaining')
fig['layout']['xaxis']['autorange'] = "reversed"
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x=percents, y=rndm_m3s, name = 'Random Attack'))
fig.add_trace(go.Scatter(x=percents, y=betw_m3s, name = 'Betweenness Attack'))
fig.add_trace(go.Scatter(x=percents, y=deg_m3s, name = 'Degree Attack'))
fig.add_trace(go.Scatter(x=percents, y=pdb_m3s, name = 'Degree * Betweenness Attack'))
fig.update_layout(title='Motifs Remaining at Percentage Nodes Remaining(M3)',
template="plotly_white",
xaxis_title='Percent Nodes Left',
yaxis_title='Motifs Remaining')
fig['layout']['xaxis']['autorange'] = "reversed"
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x=percents, y=rndm_m4s, name = 'Random Attack'))
fig.add_trace(go.Scatter(x=percents, y=betw_m4s, name = 'Betweenness Attack'))
fig.add_trace(go.Scatter(x=percents, y=deg_m4s, name = 'Degree Attack'))
fig.add_trace(go.Scatter(x=percents, y=pdb_m4s, name = 'Degree * Betweenness Attack'))
fig.update_layout(title='Motifs Remaining at Percentage Nodes Remaining(M4)',
template="plotly_white",
xaxis_title='Percent Nodes Left',
yaxis_title='Motifs Remaining')
fig['layout']['xaxis']['autorange'] = "reversed"
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x=percents, y=rndm_m5s, name = 'Random Attack'))
fig.add_trace(go.Scatter(x=percents, y=betw_m5s, name = 'Betweenness Attack'))
fig.add_trace(go.Scatter(x=percents, y=deg_m5s, name = 'Degree Attack'))
fig.add_trace(go.Scatter(x=percents, y=pdb_m5s, name = 'Degree * Betweenness Attack'))
fig.update_layout(title='Motifs Remaining at Percentage Nodes Remaining(M5)',
template="plotly_white",
xaxis_title='Percent Nodes Left',
yaxis_title='Motifs Remaining')
fig['layout']['xaxis']['autorange'] = "reversed"
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x=percents, y=rndm_m6s, name = 'Random Attack'))
fig.add_trace(go.Scatter(x=percents, y=betw_m6s, name = 'Betweenness Attack'))
fig.add_trace(go.Scatter(x=percents, y=deg_m6s, name = 'Degree Attack'))
fig.add_trace(go.Scatter(x=percents, y=pdb_m6s, name = 'Degree * Betweenness Attack'))
fig.update_layout(title='Motifs Remaining at Percentage Nodes Remaining(M6)',
template="plotly_white",
xaxis_title='Percent Nodes Left',
yaxis_title='Motifs Remaining')
fig['layout']['xaxis']['autorange'] = "reversed"
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x=percents, y=rndm_m1s, name = 'M1'))
fig.add_trace(go.Scatter(x=percents, y=rndm_m2s, name = 'M2'))
fig.add_trace(go.Scatter(x=percents, y=rndm_m3s, name = 'M3'))
fig.add_trace(go.Scatter(x=percents, y=rndm_m4s, name = 'M4'))
fig.add_trace(go.Scatter(x=percents, y=rndm_m5s, name = 'M5'))
fig.add_trace(go.Scatter(x=percents, y=rndm_m6s, name = 'M6'))
fig.update_layout(title=\
'Motifs Remaining at Percentage Nodes Remaining(Random Attack)',
template="plotly_white",
xaxis_title='Percent Nodes Left',
yaxis_title='Motifs Remaining')
fig['layout']['xaxis']['autorange'] = "reversed"
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x=percents, y=betw_m1s, name = 'M1'))
fig.add_trace(go.Scatter(x=percents, y=betw_m2s, name = 'M2'))
fig.add_trace(go.Scatter(x=percents, y=betw_m3s, name = 'M3'))
fig.add_trace(go.Scatter(x=percents, y=betw_m4s, name = 'M4'))
fig.add_trace(go.Scatter(x=percents, y=betw_m5s, name = 'M5'))
fig.add_trace(go.Scatter(x=percents, y=betw_m6s, name = 'M6'))
fig.update_layout(title=\
'Motifs Remaining at Percentage Nodes Remaining(Betweenness Attack)',
template="plotly_white",
xaxis_title='Percent Nodes Left',
yaxis_title='Motifs Remaining')
fig['layout']['xaxis']['autorange'] = "reversed"
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x=percents, y=deg_m1s, name = 'M1'))
fig.add_trace(go.Scatter(x=percents, y=deg_m2s, name = 'M2'))
fig.add_trace(go.Scatter(x=percents, y=deg_m3s, name = 'M3'))
fig.add_trace(go.Scatter(x=percents, y=deg_m4s, name = 'M4'))
fig.add_trace(go.Scatter(x=percents, y=deg_m5s, name = 'M5'))
fig.add_trace(go.Scatter(x=percents, y=deg_m6s, name = 'M6'))
fig.update_layout(title='Motifs Remaining at Percentage Nodes Remaining(Degree Attack)',
template="plotly_white",
xaxis_title='Percent Nodes Left',
yaxis_title='Motifs Remaining')
fig['layout']['xaxis']['autorange'] = "reversed"
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x=percents, y=pdb_m1s, name = 'M1'))
fig.add_trace(go.Scatter(x=percents, y=pdb_m2s, name = 'M2'))
fig.add_trace(go.Scatter(x=percents, y=pdb_m3s, name = 'M3'))
fig.add_trace(go.Scatter(x=percents, y=pdb_m4s, name = 'M4'))
fig.add_trace(go.Scatter(x=percents, y=pdb_m5s, name = 'M5'))
fig.add_trace(go.Scatter(x=percents, y=pdb_m6s, name = 'M6'))
fig.update_layout(title= \
'Motifs Remaining at Percentage Nodes Remaining(Degree * Betweenness Attack)',
template="plotly_white",
xaxis_title='Percent Nodes Left',
yaxis_title='Motifs Remaining')
fig['layout']['xaxis']['autorange'] = "reversed"
fig.show()